#Loading Packages
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
#Load Dataset - World Population
#load dataset
df = pd.read_csv("C:/Users/Nathan/Documents/Portfolio/Portfolio Data/World Population EDA/world_population.csv")
#check data types of column variables
df.dtypes
#Want to change Rank, CCA3, Country, Capital, and Continent to categorical data type. All others seem reasonable
df.Rank = df.Rank.astype('category')
df.CCA3 = df.CCA3.astype('category')
df.Country = df.Country.astype('category')
df.Capital = df.Capital.astype('category')
df.Continent = df.Continent.astype('category')
#check each column (axis=0) for missing values
missing = df.notnull().sum(axis=0)
fig = px.bar(x=missing.index,
y=missing.values,
text=missing.values,
title='World Population Dataset: Total Number of Data Points (out of 234 rows)',
#color_discrete_sequence=px.colors.sequential.ice
)
fig.update_traces(hovertemplate='<br> Variable: %{x} </br> Number of Instances: %{y}')
fig.update_layout(xaxis_title='Dataset Variables',yaxis_title='Number of Instances')
fig.show()
country_counts = df.groupby('Continent')['Country'].count()
fig = px.bar(x=country_counts.index,
y=country_counts.values,
color=country_counts.index,
text=country_counts.values,
color_discrete_sequence=px.colors.sequential.Peach[::-1],
title='World Population Dataset: Countries per Continent')
fig.update_traces(hovertemplate='<br> Continent: %{x} </br> Number of Countries Included: %{y}')
fig.update_layout(xaxis_title='Continents',yaxis_title='Number of Countries')
fig.show()
populations = ['2022 Population']
for pop in populations:
continent_populations = df.groupby('Continent')[pop].mean().round().sort_values(ascending=False)
fig=px.bar(x=continent_populations.index,
y=continent_populations.values,
color=continent_populations,
text=continent_populations.values,
color_continuous_scale=px.colors.sequential.Darkmint,
title='Average '+pop+' by Continent'
)
fig.update_layout(xaxis_title='Continents',
yaxis_title='Population Count')
fig.update_traces(hovertemplate='<br> Continent: %{x} </br> Average Population: %{y}')
fig.show()
dummy = pd.Series.to_frame(continent_populations)
dummy.reset_index(inplace=True)
fig = px.pie(df,
values=pop,
names='Continent',
color_discrete_sequence=px.colors.sequential.Darkmint[::-1],
title='Total '+pop+ ' by Continent',
hole=0.25
)
fig.update_traces(textinfo='label+percent+value')
fig.show()
populations = ['2020 Population','2010 Population','2000 Population','1990 Population','1980 Population','1970 Population']
for pop in populations:
fig = make_subplots(rows=1,cols=2,specs=[[{'type':'xy'},{'type':'domain'}]],
subplot_titles = ['Average '+pop,'Total '+pop],
horizontal_spacing=0.2,column_widths=[0.7,0.3])
fig.add_trace(
go.Bar(x=continent_populations.index,
y=continent_populations.values,
showlegend=False,
name='',
marker=dict(color=px.colors.sequential.Darkmint[::-1]),
texttemplate='%{y}'
),
row=1,
col=1
)
fig.add_trace(
go.Pie(values = df.groupby('Continent')[pop].sum().sort_values(ascending=False),
labels = df.groupby('Continent')[pop].sum().sort_values(ascending=False).index,
name='',
marker=dict(colors=px.colors.sequential.Darkmint[::-1]),
texttemplate='<br>%{label}</br>%{percent}</br>%{value}',
textposition='outside',
hole=0.25
),
row=1,
col=2
)
fig.update_layout(title_text=pop+' Demographics by Continent')
fig.show()
populations = ['2022 Population']
for pop in populations:
mostpop_country = df.groupby('Country')[pop].sum().sort_values(ascending=False).head(5)
leastpop_country = df.groupby('Country')[pop].sum().sort_values(ascending=True).head(5)
leastpop_country = leastpop_country.sort_values(ascending=False)
fig = px.bar(x=mostpop_country.index,
y=mostpop_country.values,
color_discrete_sequence = ['MidnightBlue'],
text=mostpop_country.values,
title=pop+': Top 5 Most Populated Countries'
)
fig.update_layout(xaxis_title='Country', yaxis_title='Population')
fig.update_traces(hovertemplate='<br> Country: %{x} </br> '+pop+ ': %{y}')
fig.show()
fig = px.bar(x=leastpop_country.index,
y=leastpop_country.values,
color_discrete_sequence = ['IndianRed'],
text=leastpop_country.values,
title= pop+': Top 5 Least Populated Countries'
)
fig.update_layout(xaxis_title='Country', yaxis_title='Population')
fig.update_traces(hovertemplate='<br> Country: %{x} </br> '+pop+ ': %{y}')
fig.show()
populations = ['2020 Population','2010 Population','2000 Population','1990 Population','1980 Population','1970 Population']
for pop in populations:
fig = make_subplots(rows=1,cols=2,specs=[[{'type':'xy'},{'type':'xy'}]],
subplot_titles=['Top 5 Most Populated Countries','Top 5 Least Populated Countries'],
y_title='Population', x_title='Countries')
fig.add_trace(
go.Bar(x=mostpop_country.index,
y=mostpop_country.values,
texttemplate='%{y}',
name='',
showlegend=False,
marker=dict(color='MidnightBlue')
),
row=1,
col=1
)
fig.add_trace(
go.Bar(x=leastpop_country.index,
y=leastpop_country.values,
texttemplate='%{y}',
name='',
showlegend=False,
marker=dict(color='IndianRed')
),
row=1,
col=2
)
fig.update_layout(title_text=pop)
fig.show()
y = ['2020 Population','2010 Population', '2000 Population', '1990 Population','1980 Population', '1970 Population']
y.reverse()
x = df.groupby('Country').mean()
x = x.drop(['Area (km²)', 'Density (per km²)', 'Growth Rate','World Population Percentage',],axis=1)
x.reset_index(inplace=True)
x = pd.melt(x,id_vars=['Country'],value_vars=y)
x = x.rename(columns={'variable':'Year','value':'Population'})
x['Year'] = x['Year'].str.replace(' Population','')
fig = px.choropleth(data_frame=x,
locations='Country',
locationmode='country names',
color='Population',
animation_frame='Year',
color_continuous_scale=px.colors.sequential.RdBu[::-1],
range_color=(0,1000000000),
title='World Map of Populations: 1970 - 2020',
template='ggplot2',
projection='natural earth')
fig.update_geos(lataxis_showgrid=True,
lonaxis_showgrid=True,
showocean=True, oceancolor="LightBlue",
showlakes=True, lakecolor="LightBlue")
#fig.show(renderer='notebook')
fig = make_subplots(rows=3,cols=2,specs=[[{'type':'choropleth'},{'type':'choropleth'}],
[{'type':'choropleth'},{'type':'choropleth'}],
[{'type':'choropleth'},{'type':'choropleth'}]],
subplot_titles=['1970','1980','1990','2000','2010','2020'],
vertical_spacing=0
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['1970 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=1,
col=1
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['1980 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=1,
col=2
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['1990 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=2,
col=1
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['2000 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=2,
col=2
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['2010 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=3,
col=1
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['2020 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=3,
col=2
)
fig.update_geos(projection_type='natural earth',
lataxis_showgrid=True,
lonaxis_showgrid=True,
showocean=True, oceancolor="LightBlue",
showlakes=True, lakecolor="LightBlue")
fig.update_layout(width=1500,height=1500,title_text='World Population Map of the Last 50 Years by Country')
fig.show()
Continents = ['asia']
fig = px.choropleth(data_frame=x,
locations='Country',
locationmode='country names',
color='Population',
animation_frame='Year',
color_continuous_scale=px.colors.sequential.RdBu[::-1],
range_color=(0,1000000000),
title='Asia Population Density Map: 1970 - 2020',
template='ggplot2',
scope = 'asia')
fig.update_geos(lataxis_showgrid=True,
lonaxis_showgrid=True,
showocean=True, oceancolor="LightBlue",
showlakes=True, lakecolor="LightBlue")
fig.show()
fig = make_subplots(rows=3,cols=2,specs=[[{'type':'choropleth'},{'type':'choropleth'}],
[{'type':'choropleth'},{'type':'choropleth'}],
[{'type':'choropleth'},{'type':'choropleth'}]],
subplot_titles=['1970','1980','1990','2000','2010','2020'],
vertical_spacing=0
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['1970 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=1,
col=1
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['1980 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=1,
col=2
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['1990 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=2,
col=1
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['2000 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=2,
col=2
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['2010 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=3,
col=1
)
fig.add_trace(go.Choropleth(locations=df['Country'],
locationmode='country names',
z=df['2020 Population'],
colorscale = px.colors.sequential.RdBu[::-1],
zmin=0,
zmax=1000000000
),
row=3,
col=2
)
fig.update_geos(projection_type='natural earth',
lataxis_showgrid=True,
lonaxis_showgrid=True,
showocean=True, oceancolor="LightBlue",
showlakes=True, lakecolor="LightBlue",
scope='asia')
fig.update_layout(width=1500,height=1500,title_text='Map of Asia for the Last 50 Years')
fig.show()
fig = px.choropleth(data_frame=df,
locations='Country',
locationmode='country names',
color='Area (km²)',
color_continuous_scale=px.colors.sequential.Bluyl,
title='Total Land Mass by Country',
template='ggplot2',
projection='natural earth'
)
fig.update_geos(lataxis_showgrid=True,
lonaxis_showgrid=True,
showocean=True, oceancolor="LightBlue",
showlakes=True, lakecolor="LightBlue")
fig.show()
fig = px.choropleth(data_frame=df,
locations='Country',
locationmode='country names',
color='Density (per km²)',
color_continuous_scale=px.colors.sequential.OrRd,
title='Total Population Density by Country',
template='ggplot2',
range_color=(0,150),
projection='natural earth'
)
fig.update_geos(lataxis_showgrid=True,
lonaxis_showgrid=True,
showocean=True, oceancolor="LightBlue",
showlakes=True, lakecolor="LightBlue")
example = fig
fig.show()